home *** CD-ROM | disk | FTP | other *** search
- unit broken;
- {.$DEFINE REGISTER}
- {$I-}
- interface
- uses
- Windows, WinINet, Classes;
-
- const
- BufSize = 256*1024;
-
- type
- TCheck = function(URL: ShortString; Depth: Integer): Integer of object;
- TUpdate= procedure of object;
-
- TBrokenLink = class(TComponent)
- private
- Internet: HInternet;
- HttpHandle: HInternet;
- HttpRequest: HInternet;
- private
- Server: ShortString; { www.drbob42.com }
- Buffer: String; { of BufSize }
- ExecuteCGI: Boolean; { execute *.exe/*.asp? }
- protected
- FChecking: TStringList;
- FChecked: TStringList;
- FSuspect: TStringList; { non '.htm' files }
- FBroken: TStringList;
- FMailTo: TStringList;
- FNews: TStringList;
- FHTTP: TStringList;
- FFTP: TStringList;
- protected
- FInterrupted: Boolean;
- FOnUpdate: TUpdate;
- protected
- function CheckURLinFile(const URL,FileName: ShortString; Depth: Integer;
- HTTP: Boolean; CheckCallback: TCheck): Boolean;
- protected
- function LCheck(URL: ShortString; Depth: Integer): Integer;
- function NCheck(URL: ShortString; Depth: Integer): Integer;
- public
- function LCheckURL(URL: ShortString; Depth: Integer): Boolean;
- function NCheckURL(const URL: ShortString; CGI: Boolean; Depth: Integer): Boolean;
- public
- constructor Create(AOwner: TComponent); override;
- destructor Destroy; override;
- published
- property Checking: TStringList read FChecking;
- property Checked: TStringList read FChecked;
- property Suspect: TStringList read FSuspect;
- property Broken: TStringList read FBroken;
- property MailTo: TStringList read FMailTo;
- property News: TStringList read FNews;
- property HTTP: TStringList read FHTTP;
- property FTP: TStringList read FFTP;
- published
- property Interrupted: Boolean read FInterrupted write FInterrupted;
- property OnUpdate: TUpdate read FOnUpdate write FOnUpdate;
- end;
-
- {$IFDEF REGISTER}
- procedure register;
- {$ENDIF}
-
- implementation
- uses
- SysUtils;
-
- constructor TBrokenLink.Create(AOwner: TComponent);
- begin
- inherited Create(AOwner);
- FChecking := TStringList.Create;
- FChecked := TStringList.Create;
- FChecked.Sorted := True;
- FSuspect := TStringList.Create;
- FSuspect.Sorted := True;
- FBroken := TStringList.Create;
- FBroken.Sorted := True;
- FMailTo := TStringList.Create;
- FMailTo.Sorted := True;
- FNews := TStringList.Create;
- FNews.Sorted := True;
- FHTTP := TStringList.Create;
- FHTTP.Sorted := True;
- FFTP := TStringList.Create;
- FFTP.Sorted := True;
- SetLength(Buffer,BufSize);
- end;
-
- destructor TBrokenLink.Destroy;
- begin
- Buffer := '';
- FChecking.Free;
- FChecked.Free;
- FSuspect.Free;
- FBroken.Free;
- FMailTo.Free;
- FNews.Free;
- FHTTP.Free;
- FFTP.Free;
- inherited Destroy
- end;
-
- function TBrokenLink.CheckURLinFile(const URL,FileName: ShortString; Depth: Integer;
- HTTP: Boolean; CheckCallback: TCheck): Boolean;
- var
- f: System.Text;
- OldURL,NewURL: ShortString;
- Upper,Str: String;
- i: Integer;
- begin
- {$IFDEF DEBUG}
- writeln('CheckURLinFile ',FileName);
- {$ENDIF}
- System.Assign(f,FileName);
- Reset(f);
- Result := IOResult = 0;
- if Result then
- begin
- if HTTP then readln(f,OldURL)
- else OldURL := FileName;
- while not eof(f) do
- begin
- readln(f,Str);
- SetLength(Upper,Length(Str));
- Upper := UpperCase(Str);
- while Pos('<A HREF="',Upper) > 0 do
- begin
- Delete(Str,1,Pos('<A HREF="',Upper)+8);
- Delete(Upper,1,Pos('<A HREF="',Upper)+8);
- while (Length(Upper) > 0) and (Upper[1] = ' ') do Delete(Upper,1,1);
- while (Length(Str) > 0) and (Str[1] = ' ') do Delete(Str,1,1);
- if (Pos('#',Upper) <> 1) and
- (Pos('MAILTO:',Upper) <> 1) and
- (Pos('NEWS:',Upper) <> 1) and
- (Pos('FTP://',Upper) <> 1) then { skip mailto/news/ftp }
- begin
- if Pos('FILE:///',Upper) = 1 then
- begin
- Delete(Str,1,8);
- Delete(Upper,1,8)
- end;
- if Pos('#',Upper) in [1..Pos('"',Upper)] then Upper[Pos('#',Upper)] := '"';
- if Pos('#',Str) in [1..Pos('"',Str)] then Str[Pos('#',Str)] := '"';
- if not ((Pos('://',Upper) in [1..Pos('"',Upper)]) or
- (Pos(':\',Upper) = 2) or (Upper[1] = '/')) then
- Upper := URL + Upper; { path in front }
- if not ((Pos('://',Str) in [1..Pos('"',Str)]) or
- (Pos(':\',Str) = 2) or (Str[1] = '/')) then
- Str := URL + Str; { path in front }
- while Pos('../',Upper) in [1..Pos('"',Upper)] do
- begin
- i := Pos('../',Upper);
- Delete(Upper,i,3);
- repeat
- Dec(i);
- Delete(Upper,i,1)
- until Upper[i-1] = '/'
- end;
- while Pos('../',Str) in [1..Pos('"',Str)] do
- begin
- i := Pos('../',Str);
- Delete(Str,i,3);
- repeat
- Dec(i);
- Delete(Str,i,1)
- until Str[i-1] = '/'
- end;
- while Pos('./',Upper) in [1..Pos('"',Upper)] do
- begin
- i := Pos('./',Upper);
- Delete(Upper,i,2);
- repeat
- Dec(i);
- Delete(Upper,i,1)
- until Upper[i-1] = '/'
- end;
- while Pos('./',Str) in [1..Pos('"',Str)] do
- begin
- i := Pos('./',Str);
- Delete(Str,i,2);
- repeat
- Dec(i);
- Delete(Str,i,1)
- until Str[i-1] = '/'
- end;
- NewURL := Copy(Str,1,Pos('"',Str)-1);
- if (FChecking.IndexOf(NewURL) < 0) then
- begin
- if (not FChecked.Find(NewURL,i)) and
- (not FHTTP.Find(NewURL,i)) and
- (not FSuspect.Find(IntToStr(-i)+': '+OldURL+' => '+NewURL,i)) and
- (not FBroken.Find(IntToStr(-i)+': '+OldURL+' => '+NewURL,i)) then
- if (Pos('HTTP://',Upper) = 1) and not HTTP then
- begin
- {$IFDEF DEBUG}
- writeln('external: ',NewURL);
- {$ENDIF}
- FHTTP.Add(NewURL)
- end
- else
- if not FInterrupted then
- if ExecuteCGI or (Pos('?',Upper) = 0) then
- begin
- i := CheckCallback(NewURL,Depth-1);
- if i <= 0 then
- begin
- if Pos('.htm',NewURL) > 0 then
- begin
- {$IFDEF DEBUG}
- writeln('broken: ',IntToStr(-i)+': '+OldURL+' => '+NewURL);
- {$ENDIF}
- FBroken.Add(IntToStr(-i)+': '+OldURL+' => '+NewURL)
- end
- else
- begin
- {$IFDEF DEBUG}
- writeln('suspect: ',IntToStr(-i)+': '+OldURL+' => '+NewURL);
- {$ENDIF}
- FSuspect.Add(IntToStr(-i)+': '+OldURL+' => '+NewURL)
- end
- end
- else
- begin
- if (not HTTP) or (Pos(Server,NewURL) > 0) then
- begin
- {$IFDEF DEBUG}
- writeln('checked: ',NewURL);
- {$ENDIF}
- FChecked.Add(NewURL)
- end
- else
- begin
- {$IFDEF DEBUG}
- writeln('external: ',NewURL);
- {$ENDIF}
- FHTTP.Add(NewURL)
- end
- end
- end
- end
- end
- else
- begin
- NewURL := Copy(Str,1,Pos('"',Str)-1);
- if Pos('MAILTO:',Upper) = 1 then
- begin
- {$IFDEF DEBUG}
- writeln('mailto: ',NewURL);
- {$ENDIF}
- if not FMailTo.Find(NewURL,i) then FMailTo.Add(NewURL)
- end;
- if Pos('NEWS://',Upper) = 1 then
- begin
- {$IFDEF DEBUG}
- writeln('news: ',NewURL);
- {$ENDIF}
- if not FNews.Find(NewURL,i) then FNews.Add(NewURL)
- end;
- if Pos('FTP://',Upper) = 1 then
- begin
- {$IFDEF DEBUG}
- writeln('ftp: ',NewURL);
- {$ENDIF}
- if not FFTP.Find(NewURL,i) then FFTP.Add(NewURL)
- end
- end;
- if Assigned(FOnUpdate) then OnUpdate
- end
- end;
- System.Close(f);
- if HTTP then System.Erase(f);
- if IOResult <> 0 then { skip }
- end
- end {CheckURLinFile};
-
-
- function TBrokenLink.LCheck(URL: ShortString; Depth: Integer): Integer;
- var
- Path: ShortString;
- SRec: TSearchRec;
- begin
- {$IFDEF DEBUG}
- writeln('checking: ',URL);
- {$ENDIF}
- FChecking.Add(URL);
- if Assigned(FOnUpdate) then OnUpdate;
- if FindFirst(URL,faArchive,SRec) = 0 then
- Result := SRec.Size
- else
- Result := -1;
- FindClose(SRec);
- Path := URL;
- repeat
- Delete(Path,Length(Path),1)
- until (Length(Path) = 0) or (Path[Length(Path)] = '/') or (Path[Length(Path)] = '\');
- if (Result > 0) and not FInterrupted then
- CheckURLinFile(Path,URL,Depth-1,False,LCheck);
- FChecking.Delete(Pred(FChecking.Count));
- if Assigned(FOnUpdate) then OnUpdate
- end {LCheck};
-
- function TBrokenLink.LCheckURL(URL: ShortString; Depth: Integer): Boolean;
- begin
- FInterrupted := False;
- FChecking.Clear;
- FChecked.Clear;
- FSuspect.Clear;
- FBroken.Clear;
- FMailTo.Clear;
- FNews.Clear;
- FHTTP.Clear;
- FFTP.Clear;
- if Pos('file:///',URL) = 1 then Delete(URL,1,8);
- Depth := LCheck(URL,Depth);
- Result := Depth > 0;
- if Result then
- begin
- {$IFDEF DEBUG}
- writeln('checked: ',URL);
- {$ENDIF}
- FChecked.Add(URL)
- end
- else
- begin
- if Pos('.htm',URL) > 0 then
- begin
- {$IFDEF DEBUG}
- writeln('broken: ',IntToStr(Depth)+': -> '+URL);
- {$ENDIF}
- FBroken.Add(IntToStr(Depth)+': -> '+URL)
- end
- else
- begin
- {$IFDEF DEBUG}
- writeln('suspect: ',IntToStr(Depth)+': -> '+URL);
- {$ENDIF}
- FSuspect.Add(IntToStr(Depth)+': -> '+URL)
- end
- end;
- if Assigned(FOnUpdate) then OnUpdate
- end {LCheckURL};
-
-
- function TBrokenLink.NCheck(URL: ShortString; Depth: Integer): Integer;
- const
- Name = 'c:\tmp\dump.%d';
- Ext: Word = 0;
- var
- Index,Size: DWord;
- f: File;
- begin
- if URL[Length(URL)] = '/' then Delete(URL,Length(URL),1); { skip last '/' }
- {$IFDEF DEBUG}
- writeln('checking: ',URL);
- {$ENDIF}
- FChecking.Add(URL);
- if Assigned(FOnUpdate) then OnUpdate;
- URL[Length(URL)+1] := #0; // PChar
- {$IFDEF DEBUG}
- writeln('HttpOpenRequest');
- {$ENDIF}
- HttpRequest := HttpOpenRequest(HttpHandle, nil, @URL[1],
- nil, nil, nil,
- INTERNET_FLAG_RELOAD OR INTERNET_FLAG_EXISTING_CONNECT,
- 0);
- if HttpRequest <> nil then
- begin
- {$IFDEF DEBUG}
- writeln('HttpSendRequest');
- {$ENDIF}
- if HttpSendRequest(HttpRequest, nil, 0, nil, 0) then
- begin
- Index := 0;
- { FillChar(Buffer[1],BufSize,#0); }
- Size := BufSize;
- {$IFDEF DEBUG}
- writeln('HttpQueryInfo');
- {$ENDIF}
- HttpQueryInfo(HttpRequest,1,@Buffer[1],Size,Index)
- end;
- FillChar(Buffer[1],BufSize,#0);
- Size := BufSize;
- {$IFDEF DEBUG}
- writeln('InternetReadFile');
- {$ENDIF}
- if ((Pos('.HTM',UpperCase(URL)) = 0) and (Pos('.ASP',UpperCase(URL)) = 0)) or
- (Pos(Server,URL) = 0) or
- (Depth <= 0) then Size := 512;
- if InternetReadFile(HttpRequest,@Buffer[1],Size,Size) then
- begin
- if Pos('HTTP/1.0 ',Buffer) in [1..255] then { broken }
- try
- Result := - StrToInt(Copy(Buffer,Pos('HTTP/1.0 ',Buffer)+9,3));
- except
- Result := -1
- end
- else Result := Size;
- Inc(Ext);
- System.Assign(f,Format(Name,[Ext]));
- Rewrite(f,1);
- URL := URL+#13+#10;
- BlockWrite(f,URL[1],Length(URL)); { HTTP: first line = old URL }
- BlockWrite(f,Buffer[1],Size);
- System.Close(f);
- repeat
- Delete(URL,Length(URL),1)
- until (Length(URL) = 0) or (URL[Length(URL)] = '/') or (URL[Length(URL)] = '\');
- if (Depth > 0) and (Pos(Server,URL) > 0) and not FInterrupted then
- CheckURLinFile(URL,Format(Name,[Ext]),Depth-1,True,NCheck)
- else { cleanup }
- Erase(f)
- end
- else Result := -2;
- {$IFDEF DEBUG}
- writeln('InternetCloseHandle');
- {$ENDIF}
- InternetCloseHandle(HttpRequest)
- end
- else Result := -4;
- FChecking.Delete(Pred(FChecking.Count));
- if Assigned(FOnUpdate) then OnUpdate
- end {NRequest};
-
- function TBrokenLink.NCheckURL(const URL: ShortString; CGI: Boolean; Depth: Integer): Boolean;
- begin
- Result := True;
- ExecuteCGI := CGI;
- FInterrupted := False;
- FChecking.Clear;
- FChecked.Clear;
- FSuspect.Clear;
- FBroken.Clear;
- FMailTo.Clear;
- FNews.Clear;
- FHTTP.Clear;
- FFTP.Clear;
- Server := URL;
- mkdir('C:\tmp');
- if IOResult <> 0 then { skip };
- if Pos(':',Server) > 0 then
- begin
- Delete(Server,1,Pos(':',Server));
- repeat
- Delete(Server,1,1)
- until Server[1] <> '/'
- end;
- Delete(Server,Pos('/',Server),255);
- Server[Length(Server)+1] := #0;
- {$IFDEF DEBUG}
- writeln('InternetOpen');
- {$ENDIF}
- {$IFDEF VER100}
- Internet := InternetOpen('DrBob', LOCAL_INTERNET_ACCESS, nil, nil, 0);
- {$ELSE}
- Internet := InternetOpen('DrBob', LOCAL_INTERNET_ACCESS, @Server[1], 80, 0);
- {$ENDIF}
- if Internet <> nil then
- try
- {$IFDEF DEBUG}
- writeln('InternetConnect');
- {$ENDIF}
- HttpHandle := InternetConnect(Internet, @Server[1],
- INTERNET_DEFAULT_HTTP_PORT, nil, nil,
- Internet_Service_Http, 0, 0);
- if HttpHandle <> nil then
- try
- Depth := NCheck(URL,Depth);
- Result := Depth > 0;
- if Result then
- begin
- {$IFDEF DEBUG}
- writeln('external: ',URL);
- {$ENDIF}
- FChecked.Add(URL)
- end
- else
- begin
- if Pos('.htm',URL) > 0 then
- begin
- {$IFDEF DEBUG}
- writeln('broken: ',IntToStr(-Depth)+': '+Server+' -> '+URL);
- {$ENDIF}
- FBroken.Add(IntToStr(-Depth)+': '+Server+' -> '+URL)
- end
- else
- begin
- {$IFDEF DEBUG}
- writeln('suspect: ',IntToStr(-Depth)+': '+Server+' -> '+URL);
- {$ENDIF}
- FSuspect.Add(IntToStr(-Depth)+': '+Server+' -> '+URL)
- end
- end;
- if Assigned(FOnUpdate) then OnUpdate;
- finally
- {$IFDEF DEBUG}
- writeln('InternetCloseHandle');
- {$ENDIF}
- InternetCloseHandle(HttpHandle)
- end;
- finally
- {$IFDEF DEBUG}
- writeln('InternetCloseHandle');
- {$ENDIF}
- InternetCloseHandle(Internet)
- end
- end {CheckURL};
-
- {$IFDEF REGISTER}
- procedure register;
- begin
- RegisterComponents('Dr.Bob',[TBrokenLink])
- end;
- {$ENDIF}
-
- end.
-